setwd("/Volumes/turnbaughlab/labshare/vaibhav_metastudy_Mar2017/pubmedsearch/")
knitr::opts_chunk$set(echo = TRUE, message=TRUE, warning=FALSE, tidy=FALSE, cache=FALSE)
library(readr)
library(ggplot2)
library(plotly)
library(stringr)
library(MicrobeR)
library(reshape2)
library(knitr)
library(plyr)
library(ape)
Data Import
NREV<-12 #number of reviewers, get the last reviewer and increase to 12
Reviews<-list()
for (i in 1:NREV){Reviews[[paste0("Reviewer_", i)]]<-suppressMessages(read_tsv(paste0("Aug24_2017_reviews/",i,"_decisions.txt")))}
Reviewer Stats
First generate some stats to plot out.
ReviewerStats<-data.frame(ReviewerID=names(Reviews), stringsAsFactors = F)
ReviewerStats$AcceptanceRate<-sapply(Reviews, function(x) 100*sum(x$Decision=="Include")/length(x$Decision))
ReviewerStats$TotalTime<-sapply(Reviews, function(x) {
times<-parse_time(str_split(x$TimeStamp, " ", simplify = T)[,4])
difftime(times[length(times)], times[1], units="mins")
})
ReviewerStats$AverageTime<-sapply(Reviews, function(x) {
times<-parse_time(str_split(x$TimeStamp, " ", simplify = T)[,4])
mean(diff(times, units="seconds"))
})
ReviewerStats$MedianTime<-sapply(Reviews, function(x) {
times<-parse_time(str_split(x$TimeStamp, " ", simplify = T)[,4])
median(diff(times, units="seconds"))
})
ReviewerStats$MinTime<-sapply(Reviews, function(x) {
times<-parse_time(str_split(x$TimeStamp, " ", simplify = T)[,4])
min(diff(times, units="seconds"))
})
ReviewerStats$MaxTime<-sapply(Reviews, function(x) {
times<-parse_time(str_split(x$TimeStamp, " ", simplify = T)[,4])
max(diff(times, units="seconds"))
})
Most accepting and least accepting
ReviewerStats<-ReviewerStats[order(ReviewerStats$AcceptanceRate, decreasing = T),]
ggplot(ReviewerStats, aes(x=factor(ReviewerID, levels=ReviewerStats$ReviewerID), y=AcceptanceRate)) +
geom_bar(stat="identity") +
theme_bw() +
ylab("Acceptance Rate %") +
xlab("Reviewer ID") +
ggtitle("Overall Acceptance Rate per Reviewer") +
theme(axis.text.x = element_text(angle=45, hjust=1)) +
scale_y_continuous(limits=c(0,30), breaks=seq(0,30, 5))

ggplot(ReviewerStats, aes(x=AcceptanceRate)) +
geom_freqpoly(binwidth=4) +
theme_bw() +
ylab("Number of Reviewers") +
xlab("Acceptance Rate %") +
ggtitle("Overall Acceptance Rate per Reviewer") +
theme(axis.text.x = element_text(angle=45, hjust=1))

The award for biggest hater goes to lucky Reviewer 7 at 10.95% and the most accepting to Reviewer 4 at 27.40%
Fastest and slowest
ReviewerStats<-ReviewerStats[order(ReviewerStats$TotalTime, decreasing = T),]
#excluding 12 who took breaks during
ggplot(subset(ReviewerStats, ReviewerID!="Reviewer_12"), aes(x=factor(ReviewerID, levels=ReviewerStats$ReviewerID), y=TotalTime)) +
geom_bar(stat="identity") +
theme_bw() +
ylab("Time for all reviews (min)") +
xlab("Reviewer ID") +
ggtitle("Total time for completion") +
theme(axis.text.x = element_text(angle=45, hjust=1))

ReviewerStats<-ReviewerStats[order(ReviewerStats$AverageTime, decreasing = T),]
toplot<-melt(subset(ReviewerStats, ReviewerID!="Reviewer_12"), id.vars=c("ReviewerID"), variable.name="Metric", value.name="Time")
toplot<-subset(toplot, Metric!="AcceptanceRate" & Metric!="TotalTime")
ggplot(toplot, aes(x=factor(ReviewerID, levels=ReviewerStats$ReviewerID), y=Time, group=Metric, color=Metric)) +
geom_line() +
theme_bw() +
ylab("Time (seconds)") +
xlab("Reviewer ID") +
ggtitle("Time spent per abstract") +
theme(axis.text.x = element_text(angle=45, hjust=1))

Reviewer 7 took the most time and care in their reviews while 11 was the most rushed. But how does this compare to acceptance rate?
ggplot(subset(ReviewerStats, ReviewerID!="Reviewer_12"), aes(x=AverageTime, y=AcceptanceRate, label=ReviewerID, color=ReviewerID)) +
geom_point() +
geom_text(vjust=1, hjust=0) +
theme_bw() +
xlab("Average time per review (sec)") +
ylab("Acceptance Rate (%)") +
theme(legend.position="none") +
ggtitle("Relationship between Time and Acceptance") +
scale_x_continuous(expand=c(0,4))

cor.test(ReviewerStats$AverageTime, ReviewerStats$AcceptanceRate, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: ReviewerStats$AverageTime and ReviewerStats$AcceptanceRate
## t = -0.70313, df = 10, p-value = 0.498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7033390 0.4076294
## sample estimates:
## cor
## -0.2170483
It would appear there is a weak connection between time spent reviewing and acceptance rate
Degree of consensus
MasterList<-suppressMessages(read_csv("ReviewAbstracts/AssignedReviewers.csv"))
MasterList$ReviewerA_Decision<-mapply(function(pmid, rev)
{
subset(Reviews[[paste0("Reviewer_",rev)]], PMID==pmid)$Decision
},MasterList$PMID, MasterList$ReviewerA )
MasterList$ReviewerB_Decision<-mapply(function(pmid, rev)
{
subset(Reviews[[paste0("Reviewer_",rev)]], PMID==pmid)$Decision
},MasterList$PMID, MasterList$ReviewerB )
c<-table(paste(MasterList$ReviewerA_Decision, MasterList$ReviewerB_Decision))
consensus<-
data.frame(Decision=c("Consensus on Exclude", "Consensus on Include", "Conflicting Opinion"),
Frequency=c(c["Exclude Exclude"], c["Include Include"], sum(c["Exclude Include"], c["Include Exclude"]))
)
rownames(consensus)<-NULL
kable(consensus)
| Consensus on Exclude |
317 |
| Consensus on Include |
45 |
| Conflicting Opinion |
64 |
consensus$Percentage<-round(100*consensus$Frequency/sum(consensus$Frequency))
ggplot(consensus, aes(x="", y=Percentage, fill=Decision)) +
geom_bar(stat="identity") +
coord_polar("y", start=0) +
theme_minimal()

MasterList$FinalOpinion<-apply(MasterList, 1, function(x){
if(x["ReviewerA_Decision"]=="Include" & x["ReviewerB_Decision"]=="Include"){return("Consensus_Include")}
else if(x["ReviewerA_Decision"]=="Exclude" & x["ReviewerB_Decision"]=="Exclude"){return("Consensus_Exclude")}
else {return("Conflicting")}
})
Extent of disagreement by person
#dis<-as.data.frame(t(combn(unique(MasterList$ReviewerA), 2)))
dis<-expand.grid(unique(MasterList$ReviewerA), unique(MasterList$ReviewerB))
colnames(dis)<-c("Reviewer1", "Reviewer2")
dis$N.overlap<-apply(dis, 1, function(x) {
nrow( subset(MasterList, (ReviewerA==x[1] & ReviewerB==x[2]) | (ReviewerA==x[2] & ReviewerB==x[1]) ) )
})
dis$N.conflict<-apply(dis, 1, function(x) {
nrow( subset(MasterList, FinalOpinion=="Conflicting" & ((ReviewerA==x[1] & ReviewerB==x[2]) | (ReviewerA==x[2] & ReviewerB==x[1])) ) )
})
dismets<-dis
dis<-ddply(dis, "Reviewer1", summarize, N.conflict=sum(N.conflict) , N.overlap=sum(N.overlap))
dis$Reviewer1<-paste0("Reviewer_", dis$Reviewer1)
dis$Percent.conflict<-round(100*(dis$N.conflict/dis$N.overlap),2)
dis<-dis[order(dis$Percent.conflict),]
dis$Rank<-1:nrow(dis)
kable(dis)
| 7 |
Reviewer_7 |
7 |
73 |
9.59 |
1 |
| 3 |
Reviewer_3 |
7 |
71 |
9.86 |
2 |
| 12 |
Reviewer_12 |
8 |
72 |
11.11 |
3 |
| 5 |
Reviewer_5 |
9 |
71 |
12.68 |
4 |
| 6 |
Reviewer_6 |
10 |
72 |
13.89 |
5 |
| 1 |
Reviewer_1 |
10 |
71 |
14.08 |
6 |
| 10 |
Reviewer_10 |
12 |
72 |
16.67 |
7 |
| 2 |
Reviewer_2 |
12 |
71 |
16.90 |
8 |
| 11 |
Reviewer_11 |
12 |
69 |
17.39 |
9 |
| 8 |
Reviewer_8 |
12 |
68 |
17.65 |
10 |
| 9 |
Reviewer_9 |
14 |
69 |
20.29 |
11 |
| 4 |
Reviewer_4 |
15 |
73 |
20.55 |
12 |
Reviewer 7 was the most agreed upon reviewer, while Reviewer 4 had the disagreement.
Finalized Decisions
write_csv(MasterList, "Concensus_table.csv")